{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "8f892c72",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "8bd6d60f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/root/nanot5-base-malaysian-cased-translation-v4-packing/checkpoint-75200'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers.trainer_utils import get_last_checkpoint\n",
    "\n",
    "checkpoint = get_last_checkpoint('/root/nanot5-base-malaysian-cased-translation-v4-packing')\n",
    "checkpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "9674d7c0",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer, T5ForConditionalGeneration\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "12884fa3",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = T5ForConditionalGeneration.from_pretrained(checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "94145841",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.generation_config.eos_token_id = tokenizer.eos_token_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "8b7996f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_special_ids = [0, 1, 2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "0a4647f4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " The pair of algorithms used to perform encryption and decryption are known as\n",
      "A. keys (keys)\n",
      "B. Sifer (cipher)\n",
      "C. Sifler text (ciphertext)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "q = \"\"\"\n",
    "Pasangan algoritma yang digunakan untuk melakukan penyulitan dan nyahsulit dikenali sebagai\n",
    "A. kunci (keys)\n",
    "B. Sifer (cipher)\n",
    "C. Teks sifer (ciphertext)\n",
    "\"\"\"\n",
    "input_ids = tokenizer.encode(f'terjemah ke Inggeris: {q}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 200)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "063e6832",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Hai, ada sesiapa yang boleh saya bantu?\n"
     ]
    }
   ],
   "source": [
    "s = 'Hai, ada yang bisa saya bantu?'\n",
    "input_ids = tokenizer.encode(f'terjemah ke Melayu: {s}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "92c589fe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Hai, boleh saya tolong, saya model bahasa besar?\n"
     ]
    }
   ],
   "source": [
    "s = 'Hai, ada yang bisa saya bantu, saya adalah large language model?'\n",
    "input_ids = tokenizer.encode(f'terjemah ke Melayu: {s}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "0d3b7813",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Maaf, saya tidak dapat memahami permintaan anda. Bolehkah anda memberikan lebih banyak maklumat atau konteks tentang apa yang anda maksudkan dengan \"Pegangan Pertanian\"? Saya akan cuba membantu sebaik mungkin selepas saya mendapat maklumat yang lebih jelas.\n"
     ]
    }
   ],
   "source": [
    "s = \"\"\"\n",
    "Maaf, saya tidak dapat memahami permintaan Anda. Bisakah Anda memberikan lebih banyak informasi atau konteks tentang apa yang Anda maksud dengan \"Pegangan Pertanian\"? Saya akan mencoba membantu sebaik mungkin setelah saya mendapatkan informasi yang lebih jelas.\n",
    "\"\"\".strip()\n",
    "input_ids = tokenizer.encode(f'terjemah ke Melayu: {s}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "395d3576",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Saya seorang yang kuat pada 9-8-2023 06:13 PM. Siapa yang berminat untuk memohon rangkaian, bolehkah anda hubungi saya? Unfi, Time, Maxis? Saya boleh membantu anda menyelesaikannya dengan port full. Saya juga seorang penguji, jadi anda boleh bertanya saya tentang pemasangan atau masalah teknikal. Jika anda ingin memasang kabel fiber, anda boleh menghubungi saya untuk pertanyaan.\n"
     ]
    }
   ],
   "source": [
    "ch = \"\"\"\n",
    "\n",
    "本帖最后由 强者 于 9-8-2023 06:13 PM 编辑\n",
    "\n",
    "\n",
    "谁家需要兴趣申请网络，可以找我\n",
    "Unfi,time ,maxis?\n",
    "Port full 我帮你解决?\n",
    "小弟也是一位installer,所以安装或技术问题都可以问我。?\n",
    "新屋要拉fiber cable都可以咨询我\n",
    "\"\"\"\n",
    "input_ids = tokenizer.encode(f'terjemah ke Melayu: {ch}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 200)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "2173e966",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 26 DR. 27.10.2021\n",
      "\n",
      " \n",
      "PROPOSAL TO PRAY MEETING\n",
      "\n",
      "OBJECTIVES P.M. 18(1)\n",
      " \n",
      "\n",
      "DISPOSAL OF PETRONAS GAS ASSETS IN AZERBAIJAN\n",
      "\n",
      " \n",
      "\n",
      "11.33 a.m.\n",
      "\n",
      "Dato' Seri Anwar bin Ibrahim [Port Dickson]: Mr. President, I\n",
      "apply to submit a motion under Meeting Rules 18(1) and 18(2) of the Regulations-\n",
      "the rules of the Dewan Rakyat Meeting Council as follows:\n",
      "\n"
     ]
    }
   ],
   "source": [
    "hansard = \"\"\"\n",
    "26 DR.27.10.2021 \n",
    "\n",
    " \n",
    "USUL MENANGGUHKAN MESYUARAT  \n",
    "\n",
    "DI BAWAH P.M. 18(1) \n",
    " \n",
    "\n",
    "PENJUALAN ASET GAS PETRONAS DI AZERBAIJAN \n",
    "\n",
    " \n",
    "\n",
    "11.33 pg. \n",
    "\n",
    "Dato’ Seri Anwar bin Ibrahim [Port Dickson]: Tuan Yang di-Pertua, saya \n",
    "mohon mengemukakan usul di bawah Peraturan Mesyuarat 18(1) dan 18(2) Peraturan-\n",
    "peraturan Majlis Mesyuarat Dewan Rakyat seperti berikut: \n",
    "\"\"\"\n",
    "input_ids = tokenizer.encode(f'terjemah ke Inggeris: {hansard}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 200)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "5fb9a6f2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Pitakone, aku ora bisa ngerti panjaluke. Apa sampeyan bisa menehi informasi utawa konteks babagan apa sing sampeyan maksudake \"Pegangan Pertanian\"? Aku bakal nyoba mbantu kanthi paling akurat sawise aku entuk informasi sing luwih jelas.\n"
     ]
    }
   ],
   "source": [
    "s = \"\"\"\n",
    "Maaf, saya tidak dapat memahami permintaan Anda. Bisakah Anda memberikan lebih banyak informasi atau konteks tentang apa yang Anda maksud dengan \"Pegangan Pertanian\"? Saya akan mencoba membantu sebaik mungkin setelah saya mendapatkan informasi yang lebih jelas.\n",
    "\"\"\".strip()\n",
    "input_ids = tokenizer.encode(f'terjemah ke Jawa: {s}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "7f275f5a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " maaf gw ga bisa ngerti permintaan gw, bs gw banyakan info atau konteks apa yg gw maksud \"Pegangan Pertanian\"? gw akan coba bantu sebaik mungkin setelah gw mendapat informasi yg lebih jelas.\n"
     ]
    }
   ],
   "source": [
    "s = \"\"\"\n",
    "Maaf, saya tidak dapat memahami permintaan Anda. Bisakah Anda memberikan lebih banyak informasi atau konteks tentang apa yang Anda maksud dengan \"Pegangan Pertanian\"? Saya akan mencoba membantu sebaik mungkin setelah saya mendapatkan informasi yang lebih jelas.\n",
    "\"\"\".strip()\n",
    "input_ids = tokenizer.encode(f'terjemah ke Banjar: {s}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "bb62d59c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " நீங்கள் உங்கள் நன்மையைப் பிடித்துக் கொள்ள முடியாது. \"அலையில் விவசாயிகள்\" என்று நீங்கள் என்னைப் பற்றி அறிவுருத்தினால் அல்ல என்\n"
     ]
    }
   ],
   "source": [
    "s = \"\"\"\n",
    "Maaf, saya tidak dapat memahami permintaan Anda. Bisakah Anda memberikan lebih banyak informasi atau konteks tentang apa yang Anda maksud dengan \"Pegangan Pertanian\"? Saya akan mencoba membantu sebaik mungkin setelah saya mendapatkan informasi yang lebih jelas.\n",
    "\"\"\".strip()\n",
    "input_ids = tokenizer.encode(f'terjemah ke Tamil: {s}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "82ecdfbb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Oi, aku tak boleh nak faham permintaan ko ni. Ko nak bagi lebih banyak info atau konteks pasal apa yang ko maksudkan dengan \"Pegangan Pertanian\"? Aku cuba tolong elok-elok je lepas aku dapat info yang lebih jelas.\n"
     ]
    }
   ],
   "source": [
    "s = \"\"\"\n",
    "Maaf, saya tidak dapat memahami permintaan Anda. Bisakah Anda memberikan lebih banyak informasi atau konteks tentang apa yang Anda maksud dengan \"Pegangan Pertanian\"? Saya akan mencoba membantu sebaik mungkin setelah saya mendapatkan informasi yang lebih jelas.\n",
    "\"\"\".strip()\n",
    "input_ids = tokenizer.encode(f'terjemah ke negeri sembilan: {s}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "05d2606e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Ate, mike tak faham permintaan mike. Kome nak bagi lebih info ke ape yang mike maksudkan \"Pegangan Pertanian\" tu? Mike akan cuba tolong seme yang mike dah dapat maklumat yang lebih jelas.\n"
     ]
    }
   ],
   "source": [
    "s = \"\"\"\n",
    "Maaf, saya tidak dapat memahami permintaan Anda. Bisakah Anda memberikan lebih banyak informasi atau konteks tentang apa yang Anda maksud dengan \"Pegangan Pertanian\"? Saya akan mencoba membantu sebaik mungkin setelah saya mendapatkan informasi yang lebih jelas.\n",
    "\"\"\".strip()\n",
    "input_ids = tokenizer.encode(f'terjemah ke perak: {s}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "2ee7f289",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['',\n",
       " ' Hi guys! I noticed that many people have received cookies yesterday and today. So today, I want to share some post mortem of our first batch:',\n",
       " \" It's true. This is not an expert, I also know.\",\n",
       " \" It's really crowded at KK market.\",\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " \" Imagine PH and winning the 14th general election. Then various backdoor events were happening. Finally, Ismail Sabri rose. That's why I don't give a fuck about politics anymore. I swear I've fked up.\"]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "strings = [\n",
    "    'ak tak paham la',\n",
    "    'Hi guys! I noticed semalam & harini dah ramai yang dapat cookies ni kan. So harini i nak share some post mortem of our first batch:',\n",
    "    \"Memanglah. Ini tak payah expert, aku pun tau. It's a gesture, bodoh.\",\n",
    "    'jam 8 di pasar KK memang org ramai 😂, pandai dia pilih tmpt.',\n",
    "    'Jadi haram jadah😀😃🤭',\n",
    "    'nak gi mana tuu',\n",
    "    'Macam nak ambil half day',\n",
    "    \"Bayangkan PH dan menang pru-14. Pastu macam-macam pintu belakang ada. Last-last Ismail Sabri naik. That's why I don't give a fk about politics anymore. Sumpah dah fk up dah.\",\n",
    "]\n",
    "input_ids = [{'input_ids': tokenizer.encode(f'terjemah ke Inggeris: {s}{tokenizer.eos_token}', return_tensors='pt')[\n",
    "    0]} for s in strings]\n",
    "padded = tokenizer.pad(input_ids, padding='longest')\n",
    "outputs = model.generate(**padded, max_length = 100)\n",
    "tokenizer.batch_decode([[i for i in o if i not in all_special_ids] for o in outputs], \n",
    "                       spaces_between_special_tokens = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "8c8de382",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['',\n",
       " ' Hai semua! Saya perasan semalam dan hari ini ramai yang menerima biskut ini. Jadi hari ini saya ingin berkongsi beberapa post mortem tentang kumpulan pertama kami:',\n",
       " ' Memang. Ini tidak perlu pakar, saya juga tahu.',\n",
       " ' jam 8 di pasar KK memang ramai orang 😂',\n",
       " ' \"Jadi haram jadah\"',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " ' Bayangkan PH dan menang dalam pilihan raya umum ke-14. Kemudian pelbagai pintu belakang muncul. Akhirnya, Ismail Sabri naik. Itulah sebabnya saya tidak lagi memberi komen tentang politik. Saya sudah mula mengomentari.']"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "strings = [\n",
    "    'ak tak paham la',\n",
    "    'Hi guys! I noticed semalam & harini dah ramai yang dapat cookies ni kan. So harini i nak share some post mortem of our first batch:',\n",
    "    \"Memanglah. Ini tak payah expert, aku pun tau. It's a gesture, bodoh.\",\n",
    "    'jam 8 di pasar KK memang org ramai 😂, pandai dia pilih tmpt.',\n",
    "    'Jadi haram jadah😀😃🤭',\n",
    "    'nak gi mana tuu',\n",
    "    'Macam nak ambil half day',\n",
    "    'mu kecek molek cikit, mu nak isle ke nak air',\n",
    "    \"Bayangkan PH dan menang pru-14. Pastu macam-macam pintu belakang ada. Last-last Ismail Sabri naik. That's why I don't give a fk about politics anymore. Sumpah dah fk up dah.\",\n",
    "]\n",
    "input_ids = [{'input_ids': tokenizer.encode(f'terjemah ke Melayu: {s}{tokenizer.eos_token}', return_tensors='pt')[\n",
    "    0]} for s in strings]\n",
    "padded = tokenizer.pad(input_ids, padding='longest')\n",
    "outputs = model.generate(**padded, max_length = 100)\n",
    "tokenizer.batch_decode([[i for i in o if i not in all_special_ids] for o in outputs], \n",
    "                       spaces_between_special_tokens = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "9b9b29dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Bah, kunuk sia mau bilang sama kamurang pasal kernel CUDA yang dibagi tu. Pertama-tama, kasi la guna memori sama untuk kurangkan latency akses memori global. Paham memori sama cepat dari memori global, dan dengan guna tu, benang dalam blok boleh kongsi data tanpa perlu akses memori global yang mahal. Dalam kes ni, kita boleh guna memori sama untuk bungkus array `d_a` sama `d_b` untuk setiap blok, lepas tu\n"
     ]
    }
   ],
   "source": [
    "cuda = \"\"\"\n",
    "To optimize the provided CUDA kernel, first, consider using shared memory to reduce global memory access latency. Shared memory is faster than global memory, and by using it, threads in a block can share data without requiring costly global memory access. In this case, we can use shared memory to load chunks of `d_a` and `d_b` arrays for each block, then perform calculations on these chunks. Here's an optimized version of the kernel using shared memory: ```cuda __global__ void optimized_pairwise_distance(value_t *d_output, const value_t *d_a, const value_t *d_b, int64_t size, distance::Sum sum_fn) { __shared__ value_t s_a[THREADS_PER_BLOCK]; __shared__ value_t s_b[THREADS_PER_BLOCK]; int64_t idx = blockIdx.x * blockDim.x + threadIdx.x; int64_t tid = threadIdx.x; int64_t grid_size = blockDim.x * gridDim.x; value_t *output_ptr = d_output + idx; // Load input data into shared memory if (tid < THREADS_PER_BLOCK && idx < size) { s_a[tid] = d_a[idx]; s_b[tid] = d_b[idx]; } // Synchronize threads to ensure data is loaded __syncthreads(); // Perform calculations using shared memory for (int64_t i = tid; i < THREADS_PER_BLOCK; i += grid_size) { if (i + idx < size) { output_ptr[0] = sum_fn(s_a[i], s_b[i + idx]); } } // Synchronize threads to ensure calculations are complete __syncthreads(); } ``` Now, to launch the kernel efficiently, use the following launch configuration: ```cuda constexpr int BLOCK_SIZE = 256; constexpr int THREADS_PER_BLOCK = BLOCK_SIZE; constexpr int NUM_BLOCKS = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; optimized_pairwise_distance<<<NUM_BLOCKS, THREADS_PER_BLOCK>>>(d_output, d_a, d_b, size, sum_fn); ``` Here, we've chosen a block size of 256 threads and adjusted the number of blocks to cover the entire input dataset. In addition to using shared memory, ensure you have proper memory access patterns. By loading data into shared memory, we implicitly reduced the need for coalesced global memory access since each block now works on its own local data. Keep in mind that the choice of distance function (`sum_fn`) may impact performance. Some functions may require more arithmetic operations than others, so you may want to analyze and choose the appropriate function based on the use case.\n",
    "\"\"\"\n",
    "input_ids = tokenizer.encode(f'terjemah ke sabah: {cuda}{tokenizer.eos_token}', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids, max_length = 100)\n",
    "outputs = [i for i in outputs[0] if i not in all_special_ids]\n",
    "print(tokenizer.decode(outputs, spaces_between_special_tokens = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "15f3de74",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "04029a1bccd546c5bd54e0675144de0c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from huggingface_hub import login\n",
    "\n",
    "login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "49342bc0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "87e25d7d2ba443e1a18538eb805be887",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/mesolitica/nanot5-base-malaysian-translation-v2/commit/57cebd22c04e2c3ffe4697a1e9dbfa5839e8750c', commit_message='Upload T5ForConditionalGeneration', commit_description='', oid='57cebd22c04e2c3ffe4697a1e9dbfa5839e8750c', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.push_to_hub('nanot5-base-malaysian-translation-v2', organization='mesolitica')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "7b9fdd91",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/app/venv/lib/python3.11/site-packages/transformers/utils/hub.py:721: UserWarning: The `organization` argument is deprecated and will be removed in v5 of Transformers. Set your organization directly in the `repo_id` passed instead (`repo_id={organization}/{model_id}`).\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/mesolitica/nanot5-base-malaysian-translation-v2/commit/eb1adc437a5f5488e238d7f8192d7cdff53db07d', commit_message='Upload tokenizer', commit_description='', oid='eb1adc437a5f5488e238d7f8192d7cdff53db07d', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.push_to_hub('nanot5-base-malaysian-translation-v2', organization='mesolitica')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d873013c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
